library(data.table)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggpubr)
## Loading required package: ggplot2
library(Biostrings)
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colnames, dirname, do.call,
##     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
##     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:data.table':
## 
##     first, second
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:data.table':
## 
##     shift
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.4     ✔ stringr 1.4.0
## ✔ readr   2.0.1     ✔ forcats 0.5.1
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ dplyr::between()         masks data.table::between()
## ✖ Biostrings::collapse()   masks IRanges::collapse(), dplyr::collapse()
## ✖ BiocGenerics::combine()  masks dplyr::combine()
## ✖ purrr::compact()         masks XVector::compact()
## ✖ IRanges::desc()          masks dplyr::desc()
## ✖ S4Vectors::expand()      masks tidyr::expand()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ S4Vectors::first()       masks dplyr::first(), data.table::first()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ dplyr::last()            masks data.table::last()
## ✖ purrr::reduce()          masks IRanges::reduce()
## ✖ S4Vectors::rename()      masks dplyr::rename()
## ✖ XVector::slice()         masks IRanges::slice(), dplyr::slice()
## ✖ purrr::transpose()       masks data.table::transpose()
library(foreach)
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
library(stringdist)
## 
## Attaching package: 'stringdist'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(doParallel)
## Loading required package: iterators

Read in Wuhan binding dataset

WUHAN_PEPTIDES_BINDING_SCORES=readRDS("ORIGINAL_PEPTIDES_BINDING_SCORES_V2.rds")
WUHAN_PEPTIDES_BINDING_SCORES %>% head
## # A tibble: 6 × 15
##   Peptide      Predicted_Bindi… `Thalf(h)` `EL-score` EL_Rank `BA-score` BA_Rank
##   <chr>        <chr>            <chr>      <chr>      <chr>   <chr>      <chr>  
## 1 AADLDDFSKQL  HLA-A0101,HLA-A… 0.2226,0.… 0.0085,0.… 5.3632… 0.0571,0.… 19.165…
## 2 AAGLEAPFLY   HLA-A0101,HLA-A… 0.9656,0.… 0.2728,4e… 0.5874… 0.2922,0.… 0.7888…
## 3 AAGLEAPFLYLY HLA-A0101,HLA-A… 0.9528,0.… 0.0694,1e… 1.6217… 0.2495,0.… 1.0825…
## 4 AAISDYDYY    HLA-A0101,HLA-A… 1.0031,0.… 0.3568,4e… 0.4523… 0.3202,0.… 0.6298…
## 5 AAISDYDYYR   HLA-A0101,HLA-A… 0.3461,0.… 0.0039,4e… 8.4074… 0.1012,0.… 6.4802…
## 6 AAVYRINW     HLA-A0101,HLA-A… 0.3428,0.… 4e-04,0,0… 30.964… 0.0245,0.… 62.656…
## # … with 8 more variables: Ave <chr>, Binder <chr>, nM_41 <chr>, nM <chr>,
## #   Aff_Binder <chr>, Length <int>, HydrophobicCount <int>, HydroFraction <dbl>
WUHAN_PEPTIDES_BINDING_SCORES %>% nrow
## [1] 1380

Read in Omicron mutanta binding dataset

OMICRON_PEPTIDES_BINDING_SCORES=readRDS("OMICRON_PEPTIDES_BINDING_SCORES_V2.rds")
OMICRON_PEPTIDES_BINDING_SCORES %>% head
## # A tibble: 6 × 13
##   VariantAlignment MT_Predicted_Bind… `MT_Thalf(h)`   `MT_EL-score`  MT_EL_Rank 
##   <chr>            <chr>              <chr>           <chr>          <chr>      
## 1 AEYVNNSY         HLA-A0101,HLA-A02… 0.2295,0.1438,… 0.0412,0,1e-0… 2.2565,55.…
## 2 AKSHNITLIW       HLA-A0101,HLA-A02… 0.2395,0.1924,… 0.0048,1e-04,… 7.3782,47.…
## 3 ALRITFGGP        HLA-A0101,HLA-A02… 0.0843,0.1071,… 0,9e-04,0.004… 73.4615,18…
## 4 APGQTGNIA        HLA-A0101,HLA-A02… 0.0906,0.1022,… 5e-04,0,1e-04… 29.3103,52…
## 5 CNDPFLDHK        HLA-A0101,HLA-A02… 0.1508,0.093,0… 0.0105,0,1e-0… 4.7481,63.…
## 6 CNDPFLDHKN       HLA-A0101,HLA-A02… 0.0855,0.0757,… 2e-04,0,0,0,0… 46,90,95,9…
## # … with 8 more variables: MT_BA-score <chr>, MT_BA_Rank <chr>, MT_Ave <chr>,
## #   MT_Binder <chr>, MT_nM_41 <chr>, Predicted_Binding <chr>, MT_nM <chr>,
## #   MT_Aff_Binder <chr>
OMICRON_PEPTIDES_BINDING_SCORES %>% nrow
## [1] 81

Read in mutations

MUTATIONS = readRDS("OMICRON_EPITOPE_MUTATIONS.rds")
MUTATIONS %>% mutate(Length = nchar(Peptide))%>% filter(Protein == 'surface glycoprotein') %>% filter(Length== 9)%>% nrow
## [1] 28
#MUTATIONS=MUTATIONS %>% group_by(Peptide, VariantAlignment,start_pos,Mutation) %>% dplyr::summarise(Protein = paste0(Protein, collapse = ","))

join all

MUTATIONS %>% inner_join(WUHAN_PEPTIDES_BINDING_SCORES)%>% nrow
## Joining, by = "Peptide"
## [1] 80
MUTATIONS %>% inner_join(OMICRON_PEPTIDES_BINDING_SCORES) %>% nrow
## Joining, by = "VariantAlignment"
## [1] 80
FULL_MUTATIONS_DT=MUTATIONS %>% inner_join(WUHAN_PEPTIDES_BINDING_SCORES) %>% inner_join(OMICRON_PEPTIDES_BINDING_SCORES)
## Joining, by = "Peptide"
## Joining, by = c("VariantAlignment", "Predicted_Binding")
#FULL_MUTATIONS_DT %>% head %>% DT::datatable()

FULL_MUTATIONS_DT %>% filter(Peptide %in% "WLLWPVTLA")
## # A tibble: 1 × 32
##   Peptide   VariantAlignment start_pos Protein      MutationPos Mutation Hamming
##   <chr>     <chr>                <int> <chr>        <chr>       <chr>      <dbl>
## 1 WLLWPVTLA WLLWPVTLT               55 membrane gl… 63          A63T           1
## # … with 25 more variables: Predicted_Binding <chr>, Thalf(h) <chr>,
## #   EL-score <chr>, EL_Rank <chr>, BA-score <chr>, BA_Rank <chr>, Ave <chr>,
## #   Binder <chr>, nM_41 <chr>, nM <chr>, Aff_Binder <chr>, Length <int>,
## #   HydrophobicCount <int>, HydroFraction <dbl>, MT_Predicted_Binding <chr>,
## #   MT_Thalf(h) <chr>, MT_EL-score <chr>, MT_EL_Rank <chr>, MT_BA-score <chr>,
## #   MT_BA_Rank <chr>, MT_Ave <chr>, MT_Binder <chr>, MT_nM_41 <chr>,
## #   MT_nM <chr>, MT_Aff_Binder <chr>
FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment) %>% distinct()
## # A tibble: 80 × 2
##    Peptide    VariantAlignment
##    <chr>      <chr>           
##  1 AEHVNNSY   AEYVNNSY        
##  2 AKSHNIALIW AKSHNITLIW      
##  3 APGQTGKIA  APGQTGNIA       
##  4 APRITFGGP  ALRITFGGP       
##  5 CNDPFLGVY  CNDPFLDHK       
##  6 CNDPFLGVYY CNDPFLDHKN      
##  7 DGVYFASTEK DGVYFASIEK      
##  8 DSKVGGNYNY DSKVSGNYNY      
##  9 EELKKLLEQW EELKKLLEEW      
## 10 FCNDPFLGVY FCNDPFLDHK      
## # … with 70 more rows
AGRETOPICITY_DT=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Protein, Predicted_Binding, nM_41, MT_nM_41,Binder,MT_Binder) %>%
        separate_rows_(cols = c("Predicted_Binding","nM_41","MT_nM_41","Binder","MT_Binder"),sep=",") %>% mutate(nM_41 = as.numeric(nM_41)) %>% mutate(MT_nM_41 = as.numeric(MT_nM_41))
AGRETOPICITY_DT %>% head
## # A tibble: 6 × 8
##   Peptide  VariantAlignment Protein     Predicted_Binding  nM_41 MT_nM_41 Binder
##   <chr>    <chr>            <chr>       <chr>              <dbl>    <dbl> <chr> 
## 1 AEHVNNSY AEYVNNSY         surface gl… HLA-A0101         17149.   14867. NONBI…
## 2 AEHVNNSY AEYVNNSY         surface gl… HLA-A0201         45116.   42972. NONBI…
## 3 AEHVNNSY AEYVNNSY         surface gl… HLA-A0202         43770.   41644. NONBI…
## 4 AEHVNNSY AEYVNNSY         surface gl… HLA-A0206         42925.   39324. NONBI…
## 5 AEHVNNSY AEYVNNSY         surface gl… HLA-A0211         44727.   42694. NONBI…
## 6 AEHVNNSY AEYVNNSY         surface gl… HLA-A0301         40402.   37172. NONBI…
## # … with 1 more variable: MT_Binder <chr>
AGRETOPICITY_DT=AGRETOPICITY_DT %>% mutate(Agretopicity = MT_nM_41/nM_41)

SPIKE_AGRETOPICITY=AGRETOPICITY_DT %>% filter(Protein == 'surface glycoprotein') %>% mutate(Length = nchar(Peptide))
SPIKE_AGRETOPICITY=SPIKE_AGRETOPICITY %>% filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))
SPIKE_AGRETOPICITY %>% select(Peptide,VariantAlignment) %>% distinct() %>% nrow
## [1] 57
SPIKE_AGRETOPICITY %>% group_by(Peptide) %>% dplyr::summarise(n=n())
## # A tibble: 57 × 2
##    Peptide            n
##    <chr>          <int>
##  1 AEHVNNSY           3
##  2 APGQTGKIA          2
##  3 CNDPFLGVY          3
##  4 CNDPFLGVYY         3
##  5 DGVYFASTEK         4
##  6 DSKVGGNYNY         3
##  7 FCNDPFLGVY         8
##  8 FCNDPFLGVYY        2
##  9 FGEVFNATRF         6
## 10 FGEVFNATRFASVY     2
## # … with 47 more rows
SPIKE_AGRETOPICITY %>% nrow
## [1] 463
SPIKE_AGRETOPICITY %>% select(Peptide, nM_41, MT_nM_41)%>% distinct() %>% nrow
## [1] 445
ECDF_NM_PLT=SPIKE_AGRETOPICITY %>% select(Peptide, nM_41, MT_nM_41)%>% pivot_longer(cols = c(nM_41,MT_nM_41), names_to = "Dataset",values_to = "Aff")%>%
  mutate(Dataset = replace(Dataset, Dataset == "nM_41", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_nM_41", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
        ggplot(aes(x=Aff, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 22)+grids()+ guides(color = guide_legend(nrow = 2))+ylab("Cumulative Freq. of Peptides")+xlab("Binding Affinity\n(nM)")+geom_vline(xintercept = 500,linetype="dashed",alpha=0.5)


ECDF_NM_LOG10_PLT=SPIKE_AGRETOPICITY %>% select(Peptide, nM_41, MT_nM_41)%>% pivot_longer(cols = c(nM_41,MT_nM_41), names_to = "Dataset",values_to = "Aff")%>%
  mutate(Dataset = replace(Dataset, Dataset == "nM_41", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_nM_41", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
        ggplot(aes(x=Aff, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 22)+grids()+ guides(color = guide_legend(nrow = 2))+ylab("Cumulative Freq. of Peptides")+xlab("Log10 Binding Affinity\n(nM)")+geom_vline(xintercept = 500,linetype="dashed",alpha=0.5)+scale_x_log10()


VIOLIN_NM_PLT=SPIKE_AGRETOPICITY %>% select(Peptide, nM_41, MT_nM_41)%>% pivot_longer(cols = c(nM_41,MT_nM_41), names_to = "Dataset",values_to = "Aff") %>%
  mutate(Dataset = replace(Dataset, Dataset == "nM_41", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_nM_41", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
         ggviolin(x="Dataset",y="Aff",color = "Dataset",add="boxplot",trim=TRUE)+scale_y_log10()+theme_pubr(base_size = 22)+stat_compare_means(label="p.signif",label.x.npc = "center")+ylab("Binding Affinity\n(nM)")

BA1: SUPERTYPES ASSOCIATION

AB_SUPERTYPES=fread("HLA_AB_SUPERTYPES.csv") %>% mutate(Allele = gsub("\\*","",Allele))%>% mutate(Allele = paste0("HLA-",Allele))%>% dplyr::rename(HLA_Allele = Allele)

DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% filter(Protein == 'surface glycoprotein')%>% select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","nM_41","MT_nM_41","Binder","MT_Binder"),sep=",")%>% mutate(nM_41 = as.numeric(nM_41), MT_nM_41=as.numeric(MT_nM_41))%>% mutate(Agretopicity = MT_nM_41/nM_41)


HLA_A_B_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES) %>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
## Joining, by = "HLA_Allele"
HLA_CDATASET_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))%>%
        mutate(Supertype = gsub("HLA\\-","",Supertype))%>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))

BA1_SUPERTYPE_PLT = HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% dplyr::rename(Wuhan=nM_41, Omicron=MT_nM_41) %>%
        ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+theme_pubr(base_size = 20)+facet_wrap(~Supertype,ncol=9)+scale_y_log10()+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Binding Affinity (nM)")

PRARR plots

DATA_FOR_SUPERTYPE_MUTPOSHLA=FULL_MUTATIONS_DT %>% mutate(Length = nchar(Peptide)) %>% separate_rows_(cols="MutationPos", sep=",") %>% select(Peptide, VariantAlignment, MutationPos, Predicted_Binding, Binder, MT_Binder, nM_41, MT_nM_41,Mutation)%>%
  separate_rows_(cols = "Mutation",sep=",")%>%
  separate_rows_(cols = c("Predicted_Binding","Binder","MT_Binder","nM_41","MT_nM_41"), sep=",")%>%mutate(MutationPos = as.numeric(MutationPos))%>%
    filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))%>% mutate(nM_41 = as.numeric(nM_41), MT_nM_41=as.numeric(MT_nM_41))%>% mutate(Agretopicity = MT_nM_41/nM_41)%>% dplyr::rename(HLA_Allele=Predicted_Binding)

DATA_FOR_SUPERTYPE_MUTPOSHLA_C=DATA_FOR_SUPERTYPE_MUTPOSHLA%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))

DATA_FOR_SUPERTYPE_MUTPOSHLA_A_B=DATA_FOR_SUPERTYPE_MUTPOSHLA%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES)
## Joining, by = "HLA_Allele"
DATA_FOR_SUPERTYPE_MUTPOSHLA_A_B %>% rbind(DATA_FOR_SUPERTYPE_MUTPOSHLA_C)%>% filter(! Mutation == "NA")%>%
    filter(Supertype == "B07")%>% mutate(MT_Binder = factor(MT_Binder, levels = c("NONBINDER","BINDER")))%>% arrange(desc(Agretopicity))%>%select(Peptide, VariantAlignment, HLA_Allele, MT_Binder, Mutation, Agretopicity)%>% distinct %>% DT::datatable()
PRARR_PER_MUT_PLT=DATA_FOR_SUPERTYPE_MUTPOSHLA_A_B %>% rbind(DATA_FOR_SUPERTYPE_MUTPOSHLA_C)%>% filter(! Mutation == "NA")%>%
    filter(Supertype == "B07")%>% mutate(MT_Binder = factor(MT_Binder, levels = c("NONBINDER","BINDER")))%>% arrange(desc(Agretopicity))%>%select(Peptide, VariantAlignment, HLA_Allele, MT_Binder, Mutation, Agretopicity)%>% distinct %>%
    mutate(pMHC = paste0(Peptide,"_", gsub("HLA-","",HLA_Allele)))%>%
    ggbarplot(x="pMHC",y="Agretopicity",fill="MT_Binder",position = position_dodge2())+theme_pubr(base_size = 20)+facet_grid(~Mutation,scales="free",space="free")+scale_y_log10()+rotate_x_text()+ylab("Agretopicity (log10)")+
  theme(strip.background = element_blank())
FULL_MUTATIONS_DT %>% filter(Peptide %in% grep("PRRA", Peptide, value=T))%>%
    select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder, MT_Binder)%>%
    separate_rows_(cols = c("Predicted_Binding", "nM_41", "MT_nM_41", "Binder", "MT_Binder"),sep=",")%>%
    filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))%>%mutate(nM_41=as.numeric(nM_41))%>% mutate(MT_nM_41=as.numeric(MT_nM_41))%>%
    mutate(Agretopicity = MT_nM_41/nM_41)%>% mutate(pMHC = paste0(Peptide,"_", gsub("HLA-","",Predicted_Binding)))%>%
    mutate(MT_Binder = factor(MT_Binder, levels = c("NONBINDER","BINDER")))%>%
    arrange(desc(Agretopicity))%>%
    dplyr::rename("Variant Peptide"=VariantAlignment, "HLA Allele"=Predicted_Binding, WT_BA=nM_41, MT_BA=MT_nM_41)%>%DT::datatable()
PRRAR_PMHC_PLT=FULL_MUTATIONS_DT %>% filter(Peptide %in% grep("PRRA", Peptide, value=T))%>%
    select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder, MT_Binder)%>%
    separate_rows_(cols = c("Predicted_Binding", "nM_41", "MT_nM_41", "Binder", "MT_Binder"),sep=",")%>%
    filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))%>%mutate(nM_41=as.numeric(nM_41))%>% mutate(MT_nM_41=as.numeric(MT_nM_41))%>%
    mutate(Agretopicity = MT_nM_41/nM_41)%>% mutate(pMHC = paste0(Peptide,"_", gsub("HLA-","",Predicted_Binding)))%>% mutate(MT_Binder = factor(MT_Binder, levels = c("NONBINDER","BINDER")))%>%
    arrange(desc(Agretopicity))%>%
    ggbarplot(x="pMHC",y="Agretopicity",fill="MT_Binder")+theme_pubr(base_size = 20)+rotate_x_text()+scale_y_log10()+ylab("Agretopicity (log10)")
NUM_HLAS_BIND_PEPS_PLT=FULL_MUTATIONS_DT %>% select(Peptide,Binder, Predicted_Binding, MT_Binder)%>% separate_rows_(cols = c("Binder","Predicted_Binding","MT_Binder"),sep=",")%>% filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))%>%
    pivot_longer(cols = c(Binder,MT_Binder))%>% group_by(Peptide, name,value)%>% dplyr::summarise(n=n())%>% filter(! value == "NONBINDER")%>% arrange(desc(name), desc(n))%>%
    dplyr::rename(Dataset=name)%>%mutate(Dataset = replace(Dataset, Dataset == "MT_Binder", "Omicron_Binders"))%>%mutate(Dataset = replace(Dataset, Dataset == "Binder", "Wuhan_Binders"))%>%
    mutate(Dataset = factor(Dataset, levels = c("Wuhan_Binders","Omicron_Binders")))%>%
    ggbarplot(x="Peptide",y="n", fill="Dataset",position = position_dodge())+theme_pubr(base_size = 20)+rotate_x_text()+ylab("# HLAs predicted to bind")
## `summarise()` has grouped output by 'Peptide', 'name'. You can override using the `.groups` argument.
SUBVAR_ECDF_PLT=readRDS("SUBVAR_ECDF_PLT.rds")+ylab("Cumulative Freq. of Peptides")+theme_pubr(base_size = 22)
SUBVAR_VIOLIN_PLT = readRDS("SUBVAR_VIOLIN_PLT.rds")+theme_pubr(base_size = 22)
SUBVAR_SUPERTYPE_BOXPLT = readRDS("SUBVAR_SUPERTYPE_BOXPLT.rds")+theme_pubr(base_size = 22)

BA1_SUPERTYPE_PLT = HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% dplyr::rename(Wuhan=nM_41, Omicron=MT_nM_41) %>% filter(Supertype %in% c("A02","A03","B07","C01"))%>%
        ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+theme_pubr(base_size = 20)+facet_wrap(~Supertype,ncol=9)+scale_y_log10()+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Binding Affinity (nM)")
A_GRID=cowplot::plot_grid(ECDF_NM_PLT, ECDF_NM_LOG10_PLT, VIOLIN_NM_PLT, align="hv",nrow=1)

B_GRID_i=cowplot::plot_grid(SUBVAR_ECDF_PLT,SUBVAR_VIOLIN_PLT, align="hv",nrow=1,axis="blt")
B_GRID_ii=cowplot::plot_grid(SUBVAR_SUPERTYPE_BOXPLT, align="hv",nrow=1,axis="blt")

C_GRID = cowplot::plot_grid(PRARR_PER_MUT_PLT, nrow=1, align="hv", axis="bl")
D_F_GRID = cowplot::plot_grid( PRRAR_PMHC_PLT , nrow=1,align="hv",axis="bl")
E_GRID = cowplot::plot_grid( PRRAR_PMHC_PLT+labs(fill=""),NUM_HLAS_BIND_PEPS_PLT , nrow=1, rel_widths = c(0.20,0.8))

V3

cowplot::plot_grid(A_GRID, B_GRID_i,B_GRID_ii, C_GRID, E_GRID, nrow =5, rel_heights = c(0.8,0.7,1.7,1.1,0.9), align="v", axis="blt")

Supplementary

BA1 SPIKE

AGRETOPICITY_DT=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Protein, Predicted_Binding, BA_Rank, MT_BA_Rank,Binder,MT_Binder) %>%
        separate_rows_(cols = c("Predicted_Binding","BA_Rank","MT_BA_Rank","Binder","MT_Binder"),sep=",") %>% mutate(BA_Rank = as.numeric(BA_Rank)) %>% mutate(MT_BA_Rank = as.numeric(MT_BA_Rank))

SPIKE_AGRETOPICITY=AGRETOPICITY_DT %>% filter(Protein == 'surface glycoprotein') %>% mutate(Length = nchar(Peptide))


SPIKE_AGRETOPICITY=SPIKE_AGRETOPICITY %>% filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))

SPIKE_ECDF_NM_PLT=SPIKE_AGRETOPICITY %>% select(Peptide, BA_Rank, MT_BA_Rank)%>% pivot_longer(cols = c(BA_Rank, MT_BA_Rank), names_to = "Dataset",values_to = "Aff")%>%
  mutate(Dataset = replace(Dataset, Dataset == "BA_Rank", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_BA_Rank", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
        ggplot(aes(x=Aff, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 20)+grids()+ guides(color = guide_legend(nrow = 2))+ylab("Cumulative Freq. of Peptides")+xlab("Binding Affinity\n(Rank)")+geom_vline(xintercept = 2,linetype="dashed",alpha=0.5)


SPIKE_ECDF_NM_LOG10_PLT=SPIKE_AGRETOPICITY %>% select(Peptide, BA_Rank, MT_BA_Rank)%>% pivot_longer(cols = c(BA_Rank, MT_BA_Rank), names_to = "Dataset",values_to = "Aff")%>%
  mutate(Dataset = replace(Dataset, Dataset == "BA_Rank", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_BA_Rank", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
        ggplot(aes(x=Aff, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 20)+grids()+ guides(color = guide_legend(nrow = 2))+ylab("Cumulative Freq. of Peptides")+xlab("Log10 Binding Affinity\n(Rank)")+geom_vline(xintercept = 2,linetype="dashed",alpha=0.5)+scale_x_log10()


SPIKE_VIOLIN_NM_PLT=SPIKE_AGRETOPICITY %>% select(Peptide, BA_Rank, MT_BA_Rank)%>% pivot_longer(cols = c(BA_Rank, MT_BA_Rank), names_to = "Dataset",values_to = "Aff") %>%
  mutate(Dataset = replace(Dataset, Dataset == "BA_Rank", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_BA_Rank", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
         ggviolin(x="Dataset",y="Aff",color = "Dataset",add="boxplot",trim=TRUE)+scale_y_log10()+theme_pubr(base_size = 20)+stat_compare_means(label="p.signif",label.x.npc = "center")+ylab("Binding Affinity\n(Rank)")+geom_hline(yintercept = 2,linetype="dashed",alpha=0.5)

BA1 ALL PROTEINS

AGRETOPICITY_DT=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Protein, Predicted_Binding, BA_Rank, MT_BA_Rank,Binder,MT_Binder) %>%
        separate_rows_(cols = c("Predicted_Binding","BA_Rank","MT_BA_Rank","Binder","MT_Binder"),sep=",") %>% mutate(BA_Rank = as.numeric(BA_Rank)) %>% mutate(MT_BA_Rank = as.numeric(MT_BA_Rank))


AGRETOPICITY_DT=AGRETOPICITY_DT %>% filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER"))

ECDF_NM_PLT=AGRETOPICITY_DT %>% select(Peptide, BA_Rank, MT_BA_Rank)%>% pivot_longer(cols = c(BA_Rank, MT_BA_Rank), names_to = "Dataset",values_to = "Aff")%>%
  mutate(Dataset = replace(Dataset, Dataset == "BA_Rank", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_BA_Rank", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
        ggplot(aes(x=Aff, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 20)+grids()+ guides(color = guide_legend(nrow = 2))+ylab("Cumulative Freq. of Peptides")+xlab("Binding Affinity\n(Rank)")+geom_vline(xintercept = 2,linetype="dashed",alpha=0.5)

ECDF_NM_LOG10_PLT=AGRETOPICITY_DT %>% select(Peptide, BA_Rank, MT_BA_Rank)%>% pivot_longer(cols = c(BA_Rank, MT_BA_Rank), names_to = "Dataset",values_to = "Aff")%>%
  mutate(Dataset = replace(Dataset, Dataset == "BA_Rank", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_BA_Rank", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
        ggplot(aes(x=Aff, color=Dataset))+stat_ecdf(size=1)+theme_pubr(base_size = 20)+grids()+ guides(color = guide_legend(nrow = 2))+ylab("Cumulative Freq. of Peptides")+xlab("Log10 Binding Affinity\n(Rank)")+geom_vline(xintercept = 2,linetype="dashed",alpha=0.5)+scale_x_log10()

VIOLIN_NM_PLT=AGRETOPICITY_DT %>% select(Peptide, BA_Rank, MT_BA_Rank)%>% pivot_longer(cols = c(BA_Rank, MT_BA_Rank), names_to = "Dataset",values_to = "Aff") %>%
  mutate(Dataset = replace(Dataset, Dataset == "BA_Rank", "Wuhan"))%>%mutate(Dataset = replace(Dataset, Dataset == "MT_BA_Rank", "Omicron"))%>%
  mutate(Dataset = factor(Dataset, levels = c("Wuhan","Omicron")))%>%
         ggviolin(x="Dataset",y="Aff",color = "Dataset",add="boxplot",trim=TRUE)+scale_y_log10()+theme_pubr(base_size = 20)+stat_compare_means(label="p.signif",label.x.npc = "center")+ylab("Binding Affinity\n(Rank)")+geom_hline(yintercept = 2,linetype="dashed",alpha=0.5)

subvar spike

SUBVAR_VIOLIN_PLT_RANK=readRDS( "SUBVAR_RANK_VIOLIN_PLT.rds")

SUBVAR_ECDF_PLT_RANK=readRDS( "SUBVAR_RANK_ECDF_PLT.rds")
SUBVAR_ECDF_PLT_RANK=SUBVAR_ECDF_PLT_RANK+xlab("Log 10 Binding Affinity\n(Rank)")

all proteins subvar

SUBVAR_VIOLIN_ALLPROT_PLT = readRDS( "SUBVAR_RANK_ALL_VIOLIN_PLT.rds")

SUBVAR_ECDF_ALL_PLT= readRDS( "SUBVAR_RANK_ALL_ECDF_PLT.rds")
SUBVAR_ECDF_ALL_PLT=SUBVAR_ECDF_ALL_PLT+xlab("Log 10 Binding Affinity\n(Rank)")
SPIKE_PAIRED_EPITOPE_PLT=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, Binder,MT_Binder, BA_Rank, MT_BA_Rank, Protein) %>% separate_rows_(c("Predicted_Binding","Binder","BA_Rank","MT_Binder","MT_BA_Rank","Protein"),sep=",")%>%ungroup()%>%
        filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER")) %>% mutate(BA_Rank = as.numeric(BA_Rank))%>% mutate(MT_BA_Rank = as.numeric(MT_BA_Rank))%>%filter(Protein == 'surface glycoprotein')%>%dplyr::rename(Wuhan=BA_Rank, Omicron=MT_BA_Rank)%>%
  ggpaired(cond1 = "Wuhan", cond2 = "Omicron")+facet_wrap(~Peptide)+theme_pubr(base_size = 20)+stat_compare_means(label = "p.signif",label.x.npc = "center")+scale_y_log10()+ggtitle("Spike")+ylab("BA Rank")
NON_SPIKE_PAIRED_EPITOPE_PLT=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, Binder,MT_Binder, BA_Rank, MT_BA_Rank, Protein) %>% separate_rows_(c("Predicted_Binding","Binder","BA_Rank","MT_Binder","MT_BA_Rank","Protein"),sep=",")%>%ungroup()%>%
        filter(! (Binder == "NONBINDER" & MT_Binder == "NONBINDER")) %>% mutate(BA_Rank = as.numeric(BA_Rank))%>%
  mutate(MT_BA_Rank = as.numeric(MT_BA_Rank))%>%filter(!Protein == 'surface glycoprotein')%>%
  dplyr::rename(Wuhan=BA_Rank, Omicron=MT_BA_Rank)%>%
  ggpaired(cond1 = "Wuhan", cond2 = "Omicron")+facet_wrap(~Peptide,ncol=8)+theme_pubr(base_size = 20)+stat_compare_means(label = "p.signif",label.x.npc = "center")+scale_y_log10()+ggtitle("Non-spike Proteins")+ylab("BA Rank")

#BA1 SUPERTYPES

BA1_SUPERTYPE_BOXPLT=readRDS(file="BA1_SUPERTYPE_BOXPLT.rds")

Supp Fig: Page 1

GRID_1 = cowplot::plot_grid(SPIKE_ECDF_NM_PLT, SPIKE_ECDF_NM_LOG10_PLT, SPIKE_VIOLIN_NM_PLT, nrow=1)
GRID_2 = cowplot::plot_grid(ECDF_NM_PLT, ECDF_NM_LOG10_PLT, VIOLIN_NM_PLT, nrow=1)
GRID_3 = cowplot::plot_grid(SUBVAR_ECDF_PLT_RANK,SUBVAR_VIOLIN_PLT_RANK, nrow=1)
GRID_4 = cowplot::plot_grid(SUBVAR_ECDF_ALL_PLT, SUBVAR_VIOLIN_ALLPROT_PLT, nrow=1)
GRID_5 = cowplot::plot_grid(BA1_SUPERTYPE_BOXPLT, nrow=1)
cowplot::plot_grid(GRID_1,GRID_2,GRID_3, GRID_4,GRID_5,align="hv",axis="l",nrow=5)

page 2

EMPTY=NULL

cowplot::plot_grid(SPIKE_PAIRED_EPITOPE_PLT, NON_SPIKE_PAIRED_EPITOPE_PLT, nrow=2, rel_heights = c(1.3,0.7))